# -*- coding: utf-8 -*-
import scrapy
from ..items import Shici300Item


class ShiciSpider(scrapy.Spider):
    name = 'shici'
    allowed_domains = ['gushimi.org']

    def start_requests(self):
        yield scrapy.Request('https://www.gushimi.org/gushi/', self.parse)
        # for i in range(2, 20):
        #     yield scrapy.Request('https://www.gushimi.org/gushi/index_' + str(i) + '.html', self.parse)

    def parse(self, response):
        item_list = response.xpath("//div[@class='news_box']/div[@class='news_title']/a/@href").getall()
        for item in item_list:
            if item is not '':
                yield scrapy.Request('https://www.gushimi.org' + item, callback=self.detail_parse)

    def detail_parse(self, response):
        """
        详情页爬取数据
        @ return
        """
        item = Shici300Item()
        main_body = response.xpath("//div[@class='content_box']")[0]

        item['title'] = main_body.xpath("./div[@class='box_title']/h2/text()").get()
        author_tmp = main_body.xpath("./div[@class='news_content']/div/a/text()").getall()
        item['author'] = author_tmp[0] + author_tmp[1]
        item['content'] = main_body.xpath("string(./div[@class='news_content']/div[@class='newstext'])").get()
        yield item
